Explains how to generate an interactive heatmap from a SingleCellExperiment object Includes a summarizeHeatmap function that calculates the median counts by channel and by cluster (or any other variable in colData(sce)).
For more information about heatmaply, see the heatmaply vignette and the original heatmaply publication.
SingleCellExperiment, heatmaply, data.table.rownames(sce).library(SingleCellExperiment)
library(heatmaply)
Contains a subset of the pancreas dataset
fn.sce <- file.path('..', '..', '..', 'data', 'pancreas_example_sce.rds')
sce <- readRDS(fn.sce)
sce
## class: SingleCellExperiment
## dim: 38 12274
## metadata(0):
## assays(1): counts
## rownames(38): H3 SMA ... Ir191 Ir193
## rowData names(3): channel metal target
## colnames(12274): E34_1 E34_2 ... J02_4952 J02_4953
## colData names(21): slide id ... Age Gender
## reducedDimNames(0):
## spikeNames(0):
## altExpNames(0):
Returns median counts by cluster and by channel. The arguments are the following:
sce = A SingleCellExperiment object.
expr_values = A string corresponding to an assay in the sce, should be in names(sce).
cluster_by = Name of the column containing the clusters.
channels= channels to include, should be in rownames(sce). If NULL, all channels will be summarized.
summarizeHeatmap <- function(sce, expr_values, cluster_by, channels=NULL){
require(data.table)
# Argument checks
if(is.null(expr_values)){
expr_values <- names(assays(sce))[1]
print(paste0("Warning: Assay type not provided, '", expr_values, "' used."))
}
if(is.null(channels[1])){
channels <- rownames(sce)
}
if(!all(channels %in% rownames(sce))){
stop("Channel names do not correspond to the rownames of the assay")
}
if(is.null(cluster_by)){
stop("Cluster column not provided")
}
if(!cluster_by %in% colnames(colData(sce))){
stop("The cluster_by argument should correspond to a colData(sce) column")
}
# Convert the data to a melted format
dat <- as.data.table(t(assay(sce, expr_values)[channels,]))
dat[, id := colnames(sce)]
dat[, cluster := colData(sce)[, cluster_by]]
dat <- melt.data.table(dat, id.vars=c('id','cluster'), variable.name='channel', value.name=expr_values)
# Summarize the data
dat.summary <- dat[, list(
median.val = median(get(expr_values)),
mean.val = mean(get(expr_values)),
cellspercluster = .N),
by = c('channel', 'cluster')]
# Decast the summarized data and convert to a matrix. Currently only the median values are returned
hm.cell <- dcast.data.table(dat.summary, formula='cluster ~ channel', value.var='median.val')
hm.clusters <- hm.cell$cluster
hm.cell <- as.matrix(hm.cell[, -1, with=FALSE])
# Add rownames
rownames(hm.cell) <- hm.clusters
# Return the median values
return(as.matrix(hm.cell))
}
summarizeHeatmap function# Select the relevant channels
good.channels <- rownames(sce)[! rownames(sce) %in% c('H3', 'PD-1', 'cPARP', 'Ir191', 'Ir193')]
hm <- summarizeHeatmap(sce,
expr_values = 'counts',
cluster = 'CellType',
channels = good.channels)
Displays normalized, scaled or percentized counts per cluster and channel
Normalize: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#normalize
Scale: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#scale
Percentize: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#percentize
# Normalize
heatmaply(heatmaply::normalize(hm))
# Scale
heatmaply(hm, scale="column")
# Percentize
heatmaply(heatmaply::percentize(hm))
# Caculate transformed counts
assay(sce, "exprs") <- asinh(counts(sce) / 1)
# Summarize the data
hm.exprs <- summarizeHeatmap(sce,
expr_values = 'exprs',
cluster = 'CellType',
channels = good.channels)
# Plot the heatmap
heatmaply(heatmaply::normalize(hm.exprs))
Any other variable in colData(sce) can be passed to summarizeHeatmap.
Example with donors (case).
# Summarize the data
hm.cases <- summarizeHeatmap(sce,
expr_values = 'counts',
cluster = 'case',
channels = good.channels)
# Plot the heatmap
heatmaply(heatmaply::normalize(hm.cases))
Here, row side colors are defined in function of the cell types and cell categories (defined in colData(sce)$CellType and colData(sce)$CellCat).
# Get cell types and cell categories
rsc <- unique(data.frame(`Cell type`= colData(sce)$CellType,
`Cell category` = colData(sce)$CellCat))
# Plot the heatmap
heatmaply(heatmaply::normalize(hm), RowSideColors = rsc)
Displays correlation between channels
heatmaply_cor(cor(hm))
https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#correlation-heatmaps
r <- cor(hm)
cor.test.p <- function(x){
FUN <- function(x, y) cor.test(x, y)[["p.value"]]
z <- outer(
colnames(x),
colnames(x),
Vectorize(function(i,j) FUN(x[,i], x[,j]))
)
dimnames(z) <- list(colnames(x), colnames(x))
z
}
p <- cor.test.p(hm)
heatmaply_cor(
r,
node_type = "scatter",
point_size_mat = -log10(p),
point_size_name = "-log10(p-value)",
label_names = c("x", "y", "Correlation")
)
ggheatmap(heatmaply::normalize(hm))
The file argument can be provided to save the heatmap as an html file
# Define the name of the output html file
fn.hm <- file.path('./', "saved-heatmap.html")
# Save the heatmap to html
heatmaply(heatmaply::normalize(hm), file=fn.hm)